1   package org.apache.lucene.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.io.IOException;
21  import java.util.Collections;
22  import java.util.IdentityHashMap;
23  import java.util.List;
24  import java.util.Set;
25  
26  /** An {@link CompositeReader} which reads multiple, parallel indexes.  Each
27   * index added must have the same number of documents, and exactly the same
28   * number of leaves (with equal {@code maxDoc}), but typically each contains
29   * different fields. Deletions are taken from the first reader. Each document
30   * contains the union of the fields of all documents with the same document
31   * number.  When searching, matches for a query term are from the first index
32   * added that has the field.
33   *
34   * <p>This is useful, e.g., with collections that have large fields which
35   * change rarely and small fields that change more frequently.  The smaller
36   * fields may be re-indexed in a new index and both indexes may be searched
37   * together.
38   * 
39   * <p><strong>Warning:</strong> It is up to you to make sure all indexes
40   * are created and modified the same way. For example, if you add
41   * documents to one index, you need to add the same documents in the
42   * same order to the other indexes. <em>Failure to do so will result in
43   * undefined behavior</em>.
44   * A good strategy to create suitable indexes with {@link IndexWriter} is to use
45   * {@link LogDocMergePolicy}, as this one does not reorder documents
46   * during merging (like {@code TieredMergePolicy}) and triggers merges
47   * by number of documents per segment. If you use different {@link MergePolicy}s
48   * it might happen that the segment structure of your index is no longer predictable.
49   */
50  public class ParallelCompositeReader extends BaseCompositeReader<LeafReader> {
51    private final boolean closeSubReaders;
52    private final Set<IndexReader> completeReaderSet =
53      Collections.newSetFromMap(new IdentityHashMap<IndexReader,Boolean>());
54  
55    /** Create a ParallelCompositeReader based on the provided
56     *  readers; auto-closes the given readers on {@link #close()}. */
57    public ParallelCompositeReader(CompositeReader... readers) throws IOException {
58      this(true, readers);
59    }
60  
61    /** Create a ParallelCompositeReader based on the provided
62     *  readers. */
63    public ParallelCompositeReader(boolean closeSubReaders, CompositeReader... readers) throws IOException {
64      this(closeSubReaders, readers, readers);
65    }
66  
67    /** Expert: create a ParallelCompositeReader based on the provided
68     *  readers and storedFieldReaders; when a document is
69     *  loaded, only storedFieldsReaders will be used. */
70    public ParallelCompositeReader(boolean closeSubReaders, CompositeReader[] readers, CompositeReader[] storedFieldReaders) throws IOException {
71      super(prepareLeafReaders(readers, storedFieldReaders));
72      this.closeSubReaders = closeSubReaders;
73      Collections.addAll(completeReaderSet, readers);
74      Collections.addAll(completeReaderSet, storedFieldReaders);
75      // update ref-counts (like MultiReader):
76      if (!closeSubReaders) {
77        for (final IndexReader reader : completeReaderSet) {
78          reader.incRef();
79        }
80      }
81      // finally add our own synthetic readers, so we close or decRef them, too (it does not matter what we do)
82      completeReaderSet.addAll(getSequentialSubReaders());
83    }
84  
85    private static LeafReader[] prepareLeafReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
86      if (readers.length == 0) {
87        if (storedFieldsReaders.length > 0)
88          throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
89        return new LeafReader[0];
90      } else {
91        final List<? extends LeafReaderContext> firstLeaves = readers[0].leaves();
92  
93        // check compatibility:
94        final int maxDoc = readers[0].maxDoc(), noLeaves = firstLeaves.size();
95        final int[] leafMaxDoc = new int[noLeaves];
96        for (int i = 0; i < noLeaves; i++) {
97          final LeafReader r = firstLeaves.get(i).reader();
98          leafMaxDoc[i] = r.maxDoc();
99        }
100       validate(readers, maxDoc, leafMaxDoc);
101       validate(storedFieldsReaders, maxDoc, leafMaxDoc);
102 
103       // flatten structure of each Composite to just LeafReader[]
104       // and combine parallel structure with ParallelLeafReaders:
105       final LeafReader[] wrappedLeaves = new LeafReader[noLeaves];
106       for (int i = 0; i < wrappedLeaves.length; i++) {
107         final LeafReader[] subs = new LeafReader[readers.length];
108         for (int j = 0; j < readers.length; j++) {
109           subs[j] = readers[j].leaves().get(i).reader();
110         }
111         final LeafReader[] storedSubs = new LeafReader[storedFieldsReaders.length];
112         for (int j = 0; j < storedFieldsReaders.length; j++) {
113           storedSubs[j] = storedFieldsReaders[j].leaves().get(i).reader();
114         }
115         // We pass true for closeSubs and we prevent touching of subreaders in doClose():
116         // By this the synthetic throw-away readers used here are completely invisible to ref-counting
117         wrappedLeaves[i] = new ParallelLeafReader(true, subs, storedSubs) {
118           @Override
119           protected void doClose() {}
120         };
121       }
122       return wrappedLeaves;
123     }
124   }
125   
126   private static void validate(CompositeReader[] readers, int maxDoc, int[] leafMaxDoc) {
127     for (int i = 0; i < readers.length; i++) {
128       final CompositeReader reader = readers[i];
129       final List<? extends LeafReaderContext> subs = reader.leaves();
130       if (reader.maxDoc() != maxDoc) {
131         throw new IllegalArgumentException("All readers must have same maxDoc: "+maxDoc+"!="+reader.maxDoc());
132       }
133       final int noSubs = subs.size();
134       if (noSubs != leafMaxDoc.length) {
135         throw new IllegalArgumentException("All readers must have same number of leaf readers");
136       }
137       for (int subIDX = 0; subIDX < noSubs; subIDX++) {
138         final LeafReader r = subs.get(subIDX).reader();
139         if (r.maxDoc() != leafMaxDoc[subIDX]) {
140           throw new IllegalArgumentException("All leaf readers must have same corresponding subReader maxDoc");
141         }
142       }
143     }    
144   }
145   
146   @Override
147   protected synchronized void doClose() throws IOException {
148     IOException ioe = null;
149     for (final IndexReader reader : completeReaderSet) {
150       try {
151         if (closeSubReaders) {
152           reader.close();
153         } else {
154           reader.decRef();
155         }
156       } catch (IOException e) {
157         if (ioe == null) ioe = e;
158       }
159     }
160     // throw the first exception
161     if (ioe != null) throw ioe;
162   }
163 }